#!/usr/bin/env python
# encoding: utf-8
"""
@summary: 汽车属性图片 车型图片1的数据
@attention: url pv
@author: hongxingfan
@since: 2014年8月8日    下午5:21:04
"""
import sys
import re

patterns_list = []

# 匹配url
def regex(url):
    # 在db.auto.sohu.com域下
    if "wmh-auto-tab" in url:
        return False
    
    for pattern in patterns_list:
        match = pattern.match(url)
        if match:
            return False
    return True

# 将regex.txt 中的正则组装到list
def fillPattersList():
    try:
        f = open("regex_car.txt", "r")
        for line in f:
            line = line.strip()
            pattern = re.compile(line)
            patterns_list.append(pattern)
    except:
        print("read file is error: " + line)
        sys.exit(0)
#     finally:
#         f.close()
    
if __name__ == '__main__':
    fillPattersList()
    f = open("D:/xi_car_property_pic.txt")
    for line in f:
#     for line in sys.stdin:
        line = line.strip()
        cols = line.split("\t")
#         url = cols[4] + cols[5]
        url = cols[0]
        pv = int(cols[1])
        
        if regex(url):
            print("%s\t%d" % (url, pv))
